#packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#reading files
circuits<- read.csv("f1data/circuits.csv")
constructor_results<- read.csv("f1data/constructor_results.csv")
constructor_standings<- read.csv("f1data/constructor_standings.csv")
constructors<- read.csv("f1data/constructors.csv")
driver_standings<- read.csv("f1data/driver_standings.csv")
drivers<- read.csv("f1data/drivers.csv")
lap_times<- read.csv("f1data/lap_times.csv")
pit_stops<- read.csv("f1data/pit_stops.csv")
qualifying<- read.csv("f1data/qualifying.csv")
races<- read.csv("f1data/races.csv")
results<- read.csv("f1data/results.csv")
seasons<- read.csv("f1data/seasons.csv")
sprint_results<- read.csv("f1data/sprint_results.csv")
status<- read.csv("f1data/status.csv")
#Functions

#convert "min:sec.ms" string to total seconds (as a number)
time_to_seconds <- function(time_str) {
  parts <- str_split_fixed(time_str, ":", 2)
  minutes <- as.numeric(parts[, 1])
  seconds_ms <- as.numeric(parts[, 2])
  return(minutes * 60 + seconds_ms)
}

#convert total seconds back to "min:sec.ms" format
seconds_to_time <- function(total_seconds) {
  minutes <- floor(total_seconds / 60)
  remaining_seconds <- total_seconds - (minutes * 60)
  return(sprintf("%d:%.3f", minutes, remaining_seconds))
}

#average lap times and mutate a new column
qualifying_average <- qualifying %>%
  mutate(
    q1_seconds = sapply(q1, time_to_seconds),
    q2_seconds = sapply(q2, time_to_seconds),
    q3_seconds = sapply(q3, time_to_seconds),
    average_seconds = case_when(
      !is.na(q1_seconds) & !is.na(q2_seconds) & !is.na(q3_seconds) ~ (q1_seconds + q2_seconds + q3_seconds) / 3,
      !is.na(q1_seconds) & !is.na(q2_seconds) & is.na(q3_seconds) ~ (q1_seconds + q2_seconds) / 2,
      TRUE ~ q1_seconds 
    ),
    average_time = sapply(average_seconds, seconds_to_time)
  ) %>%
  select(-q1_seconds, -q2_seconds, -q3_seconds)
## Warning: There were 11600 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `q1_seconds = sapply(q1, time_to_seconds)`.
## Caused by warning in `FUN()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 11599 remaining warnings.
#Joins to add years to data frames for graphing

#qualifying data
qualifying_by_year<-left_join(races, qualifying_average) 
## Joining with `by = join_by(raceId)`
qualifying_by_year = left_join(qualifying_by_year, constructors, by = join_by(constructorId))
#delete
qualifying_by_year$name.y = as.character(qualifying_by_year$name.y)

#results data by year
results_LapTimeSeconds <- results %>%
  mutate(fastestLapTime_seconds = sapply(fastestLapTime , time_to_seconds))
## Warning: There were 18507 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `fastestLapTime_seconds = sapply(fastestLapTime,
##   time_to_seconds)`.
## Caused by warning in `FUN()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 18506 remaining warnings.
results_by_year<- left_join(races, results_LapTimeSeconds, by = join_by(raceId))
results_by_year = left_join(results_by_year, constructors, by = join_by(constructorId))

#constructor standings by year
constructor_standings_by_year<-left_join(races, constructor_standings)
## Joining with `by = join_by(raceId)`
#filters for current teams on the grid
relevant_results = left_join(x = constructor_standings_by_year, y = constructors, 
                  by = join_by(constructorId)) %>% filter(name.y == 
                  'Alpine F1 Team' | name.y == 'Aston Martin' | name.y == 'Ferrari'| 
                          name.y =='Haas F1 Team' | name.y == 'McLaren' | 
                          name.y == 'Mercedes' | name.y == 'AlphaTauri' |
                          name.y == 'Red Bull' | name.y == 'Sauber' |
                          name.y == 'Williams')
#setting color palette
colorpalette = c('#041F30', '#FD4BC7', '#00352F', '#A6051A', '#AEAEAE', '#FF8000', 
'#111111', '#223971', '#00E701', '#00A3E0')
names(colorpalette) = c('Alpine F1 Team', 'Aston Martin', 'Ferrari', 'Haas F1 Team', 
                 'McLaren', 'Mercedes', 'AlphaTauri', 'Red Bull', 'Sauber', 'Williams')
#Graphs for Part A: Overview of all current F1 teams

#average qual time by year
qualifying_by_year %>% filter(name.y == 'Alpine F1 Team' | 
                      name.y == 'Aston Martin' | name.y == 'Ferrari' | 
                      name.y =='Haas F1 Team' | name.y == 'McLaren' | 
                      name.y == 'Mercedes' | name.y == 'AlphaTauri' |
                      name.y == 'Red Bull' | name.y == 'Sauber' |
                      name.y == 'Williams') %>% ggplot(aes(x = year,
                                                y = average_seconds, color = 
                                                name.y)) + geom_smooth(se = 
                                                          FALSE)+labs(
                          title = "Average Qualifying Times by Season",
          subtitle = "All current teams",
          x = "Season", y = "Average Qualifying Time (min:ss)", 
          color = 'Teams')  +   theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white"),
    legend.background = element_rect(fill = "black")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 77 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2023
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1.9999e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 4.6302e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 4.6302e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1

#wins by year for current teams
overview_results = left_join(x =races, y = relevant_results, by = join_by(raceId))
overview_results = left_join(overview_results, constructors, by = join_by(constructorId))
constructor_wins = overview_results %>%  group_by(name.y, year.x) %>% 
  summarise(total_wins = max(wins, na.rm = TRUE)) %>% drop_na(name.y) 
## Warning: There were 9 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `total_wins = max(wins, na.rm = TRUE)`.
## ℹ In group 253: `name.y = NA` and `year.x = 1950`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 8 remaining warnings.
## `summarise()` has grouped output by 'name.y'. You can override using the
## `.groups` argument.
#overview_results$win = overview_results$position == '1'
#constructor_wins = overview_results %>% filter(win == TRUE) %>% 
  #group_by(name.y, year) %>% count(win) %>% select(win_count = n)

ggplot(constructor_wins, aes(x = year.x, y = total_wins, fill = name.y)) +
  geom_col(color = '#616161', width = 2, alpha = 1/2) + labs(title = 
                                          "Wins by Season",
                                          subtitle = "All current teams",
                                         x = "Season", y = "Wins", 
                                         fill = 'Teams') + 
   theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white"),
    legend.background = element_rect(fill = "black")
  )
## Warning: `position_stack()` requires non-overlapping x intervals.

#fastest lap time by year
results_by_year %>% filter(name.y == 'Alpine F1 Team' | 
                      name.y == 'Aston Martin' | name.y == 'Ferrari' | 
                      name.y =='Haas F1 Team' | name.y == 'McLaren' | 
                      name.y == 'Mercedes' | name.y == 'AlphaTauri' |
                      name.y == 'Red Bull' | name.y == 'Sauber' |
                      name.y == 'Williams') %>% drop_na(fastestLapTime_seconds) %>% ggplot(aes(x = year, y = 
                                              fastestLapTime_seconds, color = name.y)) + 
                                              geom_smooth(alpha = 1/5, se = FALSE) + labs(title = 
                                          "Fastest Lap Times by Season",
                                          subtitle = "All current teams",
                                         x = "Season", y = "Lap Time (min:ss)", 
                                         color = 'Teams') + 
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white"),
    legend.background = element_rect(fill = "black")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2023
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 6.2987e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2021
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 8.5326e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 4.0602
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 2024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 2.015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 4.6944e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 1

# graphs for Part B: A closer look at Ferrari

#filtering for ferrari
ferrari_qualifying<- qualifying_by_year %>% filter(constructorId == 6)
ferrari_results<- results_by_year %>% filter(constructorId == 6)
ferrari_standings<- constructor_standings_by_year %>% filter(constructorId ==6)

Mclaren_qualifying<- qualifying_by_year %>% filter(constructorId == 1)
Mclaren_results<- results_by_year %>% filter(constructorId == 1)
Mclaren_standings<- constructor_standings_by_year %>% filter(constructorId == 1)

#summarizing for average data per season ferrari
ferrari_avg_qualifyingTime<- ferrari_qualifying %>% group_by(year) %>% summarise(avg_time_per_season = mean(average_seconds, na.rm = TRUE))

ferrari_avg_LapTime<- ferrari_results %>% group_by(year) %>% summarise(avg_fastestLapTime_per_season = mean(fastestLapTime_seconds, na.rm = TRUE)) %>% filter(!is.na(avg_fastestLapTime_per_season))

ferrari_avg_fastestLapSpeed<- ferrari_results %>% group_by(year) %>% summarise(avg_fastestLapSpeed_per_season = mean(as.numeric(fastestLapSpeed), na.rm = TRUE)) %>% filter(!is.na(avg_fastestLapSpeed_per_season))
## Warning: There were 68 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `avg_fastestLapSpeed_per_season =
##   mean(as.numeric(fastestLapSpeed), na.rm = TRUE)`.
## ℹ In group 1: `year = 1950`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 67 remaining warnings.
ferrari_avg_finishPosition<- ferrari_results %>% group_by(year) %>% summarise(avg_finishPosition_per_season = mean(positionOrder, na.rm = TRUE)) %>% filter(!is.na(avg_finishPosition_per_season))

ferrari_avg_rank<- ferrari_results %>% group_by(year) %>% summarise(avg_rank_per_season = mean(as.numeric(rank), na.rm = TRUE)) %>% filter(!is.na(avg_rank_per_season))
## Warning: There were 57 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `avg_rank_per_season = mean(as.numeric(rank), na.rm = TRUE)`.
## ℹ In group 1: `year = 1950`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 56 remaining warnings.
ferrari_avg_runTime<- ferrari_results %>% group_by(year) %>% summarise(avg_runTime_per_season = mean(as.numeric(milliseconds), na.rm = TRUE)) %>% filter(!is.na(avg_runTime_per_season))
## Warning: There were 75 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `avg_runTime_per_season = mean(as.numeric(milliseconds), na.rm =
##   TRUE)`.
## ℹ In group 1: `year = 1950`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 74 remaining warnings.
ferrari_total_wins<- ferrari_standings %>% group_by(year) %>% summarise(total_wins = max(wins, na.rm = TRUE))


#summarizing for average data per season Mclaren
Mclaren_avg_qualifyingTime<- Mclaren_qualifying %>% group_by(year) %>% summarise(avg_time_per_season = mean(average_seconds, na.rm = TRUE))

Mclaren_avg_LapTime<- Mclaren_results %>% group_by(year) %>% summarise(avg_fastestLapTime_per_season = mean(fastestLapTime_seconds, na.rm = TRUE)) %>% filter(!is.na(avg_fastestLapTime_per_season))

Mclaren_avg_fastestLapSpeed<- Mclaren_results %>% group_by(year) %>% summarise(avg_fastestLapSpeed_per_season = mean(as.numeric(fastestLapSpeed), na.rm = TRUE)) %>% filter(!is.na(avg_fastestLapSpeed_per_season))
## Warning: There were 49 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `avg_fastestLapSpeed_per_season =
##   mean(as.numeric(fastestLapSpeed), na.rm = TRUE)`.
## ℹ In group 1: `year = 1968`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 48 remaining warnings.
Mclaren_avg_finishPosition<- Mclaren_results %>% group_by(year) %>% summarise(avg_finishPosition_per_season = mean(positionOrder, na.rm = TRUE)) %>% filter(!is.na(avg_finishPosition_per_season))

Mclaren_avg_rank<- Mclaren_results %>% group_by(year) %>% summarise(avg_rank_per_season = mean(as.numeric(rank), na.rm = TRUE)) %>% filter(!is.na(avg_rank_per_season))
## Warning: There were 38 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `avg_rank_per_season = mean(as.numeric(rank), na.rm = TRUE)`.
## ℹ In group 1: `year = 1968`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 37 remaining warnings.
Mclaren_avg_runTime<- Mclaren_results %>% group_by(year) %>% summarise(avg_runTime_per_season = mean(as.numeric(milliseconds), na.rm = TRUE)) %>% filter(!is.na(avg_runTime_per_season))
## Warning: There were 55 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `avg_runTime_per_season = mean(as.numeric(milliseconds), na.rm =
##   TRUE)`.
## ℹ In group 1: `year = 1968`.
## Caused by warning in `mean()`:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 54 remaining warnings.
Mclaren_total_wins<- Mclaren_standings %>% group_by(year) %>% summarise(total_wins = max(wins, na.rm = TRUE))


#ferrari qualifying times by year
ferrari_avg_qualifyingTime %>% 
  ggplot(aes(x = year, y = avg_time_per_season)) + 
  geom_point(color = '#FFF200') + 
  geom_smooth(color = '#EF1A2D') + 
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') + 
  labs(
    title = "Ferrari Average Qualifying Times by Season",
    subtitle = "Measured in min:ss | Lower is better | From 1994 to 2024",
    x = "Season",
    y = "Average Qualifying Time"
  )+ 
  scale_x_continuous(
    limits = c(min(ferrari_avg_qualifyingTime$year), max(ferrari_avg_qualifyingTime$year)),
    expand = c(0, 0)
  )  + 
  scale_y_continuous(
    labels = function(x) paste0(sprintf("%02d", floor(x / 60)), ":", sprintf("%02d", round(x %% 60))) 
  ) + 
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Ferrari fastest lap times by year
ferrari_avg_LapTime %>% 
  ggplot(aes(x = year, y = avg_fastestLapTime_per_season)) + 
  geom_point(color = '#FFF200') + 
  geom_smooth(color = '#EF1A2D') + 
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') + 
  labs(
    title = "Ferrari Fastest Lap Times by Season",
    subtitle = "Measured in min:ss | Lower is better | From 2004 - 2024",
    x = "Season",
    y = "Average Fastest Lap Time"
  ) + 
  scale_x_continuous(
    limits = c(2005, 2025),
    expand = c(0, 0.5)
  ) + 
  scale_y_continuous(
    labels = function(x) paste0(sprintf("%02d", floor(x / 60)), ":", sprintf("%02d", round(x %% 60)))
  ) + 
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

#Ferrari fastest lap speed over time
ferrari_avg_fastestLapSpeed %>%
  ggplot(aes(x = year, y = avg_fastestLapSpeed_per_season)) +
  geom_point(color = '#FFF200') +
  geom_smooth(color = '#EF1A2D') +
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') +
  labs(
    title = "Ferrari Fastest Lap Speed by Season",
    subtitle = "Measured in km/h and mph | From 2004 to 2024",
    x = "Season"
  ) +
  scale_y_continuous(
    name = "Average Fastest Lap Speed (km/h)",
    sec.axis = sec_axis(~ . * 0.621371, name = "Average Fastest Lap Speed (mph)")
  ) +
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    axis.text.y.right = element_text(color = "white"),
    axis.title.y.right = element_text(color = "white"), 
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Ferrari finish position over time
ferrari_avg_finishPosition %>%
  ggplot(aes(x = year, y = avg_finishPosition_per_season)) +
  geom_point(color = '#FFF200') +
  geom_smooth(color = '#EF1A2D') +
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') +
  labs(
    title = "Ferrari Average Finish Position by Season",
    subtitle = "Lower is better | From 1950 to 2024",
    x = "Season",
    y = "Average Finish Position"
  ) +
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Ferrari rank over time
ferrari_avg_rank %>%
  ggplot(aes(x = year, y = avg_rank_per_season)) +
  geom_point(color = '#FFF200') +
  geom_smooth(color = '#EF1A2D') +
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') +
  labs(
    title = "Ferrari Average Rank by Season",
    subtitle = "Lower is better | From 2004 to 2024",
    x = "Season",
    y = "Average Finish Rank"
  ) + 
  scale_x_continuous(
    limits = c(min(ferrari_avg_rank$year), max(ferrari_avg_rank$year) + 1),
    expand = c(0, .5) 
  ) + 
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Ferrari race run time over time
ferrari_avg_runTime %>%
  ggplot(aes(x = year, y = as.numeric(avg_runTime_per_season)/3600000)) +
  geom_point(color = '#FFF200') +
  geom_smooth(color = '#EF1A2D') +
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') +
  labs(
    title = "Ferrari Average Runtime by Season",
    subtitle = "Converted from milliseconds to hours | From 1950 to 2024",
    x = "Season",
    y = "Average Runtime (Hours)"
  ) +
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Ferrari wins over time
ferrari_total_wins %>% ggplot(aes(x = year, y = total_wins)) + geom_point(color = '#FFF200') + geom_line(color = '#EF1A2D') + geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') + theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold"),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  ) + scale_y_continuous(breaks = seq(0, max(ferrari_total_wins$total_wins), by = 1)) + scale_x_continuous(breaks = seq(1955, 2025, by = 5)) + labs(
    title = "Ferrari Wins by Season",
    subtitle = "From 1958 to 2024",
    x = "Season",
    y = "Total Wins"
  )
## `geom_smooth()` using formula = 'y ~ x'

#Mclaren qualifying times by year
Mclaren_avg_qualifyingTime %>% 
  ggplot(aes(x = year, y = avg_time_per_season)) + 
  geom_point(color = '#FF8000') + 
  geom_smooth(color = '#47c7fc') + 
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') + 
  labs(
    title = "Mclaren Average Qualifying Times by Season",
    subtitle = "Measured in min:ss | Lower is better | From 1994 to 2024",
    x = "Season",
    y = "Average Qualifying Time"
  )+ 
  scale_x_continuous(
    limits = c(min(ferrari_avg_qualifyingTime$year), max(ferrari_avg_qualifyingTime$year)),
    expand = c(0, 0)
  )  + 
  scale_y_continuous(
    labels = function(x) paste0(sprintf("%02d", floor(x / 60)), ":", sprintf("%02d", round(x %% 60))) 
  ) + 
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Mclaren fastest lap times by year
Mclaren_avg_LapTime %>% 
  ggplot(aes(x = year, y = avg_fastestLapTime_per_season)) + 
  geom_point(color = '#FF8000') + 
  geom_smooth(color = '#47c7fc') + 
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') + 
  labs(
    title = "Mclaren Fastest Lap Times by Season",
    subtitle = "Measured in min:ss | Lower is better | From 2004 - 2024",
    x = "Season",
    y = "Average Fastest Lap Time"
  ) + 
  scale_x_continuous(
    limits = c(2005, 2025),
    expand = c(0, 0.5)
  ) + 
  scale_y_continuous(
    labels = function(x) paste0(sprintf("%02d", floor(x / 60)), ":", sprintf("%02d", round(x %% 60)))
  ) + 
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

#Ferrari fastest lap speed over time
Mclaren_avg_fastestLapSpeed %>%
  ggplot(aes(x = year, y = avg_fastestLapSpeed_per_season)) +
  geom_point(color = '#FF8000') +
  geom_smooth(color = '#47c7fc') +
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') +
  labs(
    title = "Mclaren Fastest Lap Speed by Season",
    subtitle = "Measured in km/h and mph | From 2004 to 2024",
    x = "Season"
  ) +
  scale_y_continuous(
    name = "Average Fastest Lap Speed (km/h)",
    sec.axis = sec_axis(~ . * 0.621371, name = "Average Fastest Lap Speed (mph)")
  ) +
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    axis.text.y.right = element_text(color = "white"),
    axis.title.y.right = element_text(color = "white"), 
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Mclaren finish position over time
Mclaren_avg_finishPosition %>%
  ggplot(aes(x = year, y = avg_finishPosition_per_season)) +
  geom_point(color = '#FF8000') +
  geom_smooth(color = '#47c7fc') +
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') +
  labs(
    title = "Mclaren Average Finish Position by Season",
    subtitle = "Lower is better | From 1950 to 2024",
    x = "Season",
    y = "Average Finish Position"
  ) +
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Mclaren rank over time
Mclaren_avg_rank %>%
  ggplot(aes(x = year, y = avg_rank_per_season)) +
  geom_point(color = '#FF8000') +
  geom_smooth(color = '#47c7fc') +
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') +
  labs(
    title = "McLaren Average Rank by Season",
    subtitle = "Lower is better | From 2004 to 2024",
    x = "Season",
    y = "Average Finish Rank"
  ) + 
  scale_x_continuous(
    limits = c(min(ferrari_avg_rank$year), max(ferrari_avg_rank$year) + 1),
    expand = c(0, .5) 
  ) + 
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Mclaren race run time over time
Mclaren_avg_runTime %>%
  ggplot(aes(x = year, y = as.numeric(avg_runTime_per_season)/3600000)) +
  geom_point(color = '#FF8000') +
  geom_smooth(color = '#47c7fc') +
  geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') +
  labs(
    title = "McLaren Average Runtime by Season",
    subtitle = "Converted from milliseconds to hours | From 1950 to 2024",
    x = "Season",
    y = "Average Runtime (Hours)"
  ) +
  theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold", size = 16),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  )
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Mclaren wins over time
Mclaren_total_wins %>% ggplot(aes(x = year, y = total_wins)) + geom_point(color = '#FF8000') + geom_line(color = '#47c7fc') + geom_smooth(linetype = "longdash", method = lm, se = FALSE, color = '#00A551') + theme(
    panel.background = element_rect(fill = "black"),
    plot.background = element_rect(fill = "black"),
    panel.grid.major = element_line(color = "gray30"),
    panel.grid.minor = element_line(color = "gray20"),
    axis.text = element_text(color = "white"),
    axis.title = element_text(color = "white"),
    plot.title = element_text(color = "white", face = "bold"),
    plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
    legend.text = element_text(color = "white"),
    legend.title = element_text(color = "white")
  ) + scale_y_continuous(breaks = seq(0, max(ferrari_total_wins$total_wins), by = 1)) + scale_x_continuous(breaks = seq(1955, 2025, by = 5)) + labs(
    title = "McLaren Wins by Season",
    subtitle = "From 1958 to 2024",
    x = "Season",
    y = "Total Wins"
  )
## `geom_smooth()` using formula = 'y ~ x'

#comparing ferrari and mclaren

# Combine ferrari and mcLaren summaries for comparison
# Qualifying Time
qualifying_comparison <- bind_rows(
  ferrari_avg_qualifyingTime %>% mutate(Team = "Ferrari"),
  Mclaren_avg_qualifyingTime %>% mutate(Team = "McLaren")
)

# Fastest Lap Time
lapTime_comparison <- bind_rows(
  ferrari_avg_LapTime %>% mutate(Team = "Ferrari"),
  Mclaren_avg_LapTime %>% mutate(Team = "McLaren")
)

# Fastest Lap Speed
speed_comparison <- bind_rows(
  ferrari_avg_fastestLapSpeed %>% mutate(Team = "Ferrari"),
  Mclaren_avg_fastestLapSpeed %>% mutate(Team = "McLaren")
)

# Finish Position
finish_comparison <- bind_rows(
  ferrari_avg_finishPosition %>% mutate(Team = "Ferrari"),
  Mclaren_avg_finishPosition %>% mutate(Team = "McLaren")
)

# Rank
rank_comparison <- bind_rows(
  ferrari_avg_rank %>% mutate(Team = "Ferrari"),
  Mclaren_avg_rank %>% mutate(Team = "McLaren")
)

# Wins
wins_comparison <- bind_rows(
  ferrari_total_wins %>% mutate(Team = "Ferrari"),
  Mclaren_total_wins %>% mutate(Team = "McLaren")
)

# Color palette for ferrari and mcLaren
team_colors <- c("Ferrari" = "#EF1A2D", "McLaren" = "#47C7FC")

# Define the F1 dark theme to avoid repeating...cuz that's kinda annoying
theme_f1 <- theme(
  panel.background = element_rect(fill = "black"),
  plot.background = element_rect(fill = "black"),
  panel.grid.major = element_line(color = "gray30"),
  panel.grid.minor = element_line(color = "gray20"),
  axis.text = element_text(color = "white"),
  axis.title = element_text(color = "white"),
  plot.title = element_text(color = "white", face = "bold", size = 16),
  plot.subtitle = element_text(color = "white", size = 12, margin = margin(b = 10)),
  legend.text = element_text(color = "white"),
  legend.title = element_text(color = "white"),
  legend.background = element_rect(fill = "black")
)

#qualifying Time Comparison
qualifying_comparison %>%
  ggplot(aes(x = year, y = avg_time_per_season, color = Team)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(title = "Ferrari vs McLaren: Average Qualifying Time", subtitle = "Lower is better | 1994-2024", x = "Season", y = "Average Qualifying Time (min:ss)", color = "Team") +
  scale_color_manual(values = team_colors) +
  scale_y_continuous(labels = function(x) paste0(sprintf("%02d", floor(x/60)), ":", sprintf("%02d", round(x %% 60)))) +
  theme_f1
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#fastest Lap Time Comparison
lapTime_comparison %>%
  ggplot(aes(x = year, y = avg_fastestLapTime_per_season, color = Team)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(title = "Ferrari vs McLaren: Fastest Lap Time", subtitle = "Lower is better", x = "Season", y = "Average Fastest Lap Time (min:ss)", color = "Team") +
  scale_color_manual(values = team_colors) +
  scale_y_continuous(labels = function(x) paste0(sprintf("%02d", floor(x/60)), ":", sprintf("%02d", round(x %% 60)))) +
  theme_f1
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#fastest Lap Speed Comparison
speed_comparison %>%
  ggplot(aes(x = year, y = avg_fastestLapSpeed_per_season, color = Team)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(title = "Ferrari vs McLaren: Fastest Lap Speed", subtitle = "Higher is better", x = "Season", y = "Average Speed (km/h)", color = "Team") +
  scale_color_manual(values = team_colors) +
  theme_f1
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#finish Position Comparison
finish_comparison %>%
  ggplot(aes(x = year, y = avg_finishPosition_per_season, color = Team)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(title = "Ferrari vs McLaren: Average Finish Position", subtitle = "Lower is better", x = "Season", y = "Average Position", color = "Team") +
  scale_color_manual(values = team_colors) +
  theme_f1
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#rank Comparison
rank_comparison %>%
  ggplot(aes(x = year, y = avg_rank_per_season, color = Team)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(title = "Ferrari vs McLaren: Average Rank", subtitle = "Lower is better", x = "Season", y = "Average Rank", color = "Team") +
  scale_color_manual(values = team_colors) +
  theme_f1
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#wins Comparison
wins_comparison %>%
  ggplot(aes(x = year, y = total_wins, color = Team)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(title = "Ferrari vs McLaren: Wins by Season", subtitle = "Higher is better", x = "Season", y = "Total Wins", color = "Team") +
  scale_color_manual(values = team_colors) +
  theme_f1
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'